#coding:utf8


#sudo apt-get install python-bs4
#用于html解析

import re
from bs4 import BeautifulSoup
import urllib2
import datetime
import smtplib
from email.mime.text import MIMEText
from email.utils import formatdate

def get_date():
    return datetime.datetime.now().strftime('%Y-%m-%d: ')

def get_current_chapter():
    with open('./current_chapter') as f:
        current_chapter = f.readline().strip('\n')
    return current_chapter
    
def set_current_chapter(chapter):
    with open('./current_chapter','w') as f:
        f.truncate()
        f.write(chapter)
    return



def send_email(content):
    mail_from = 'robot@feling.net' 
    host = 'smtp.feling.net'
    port = 25
    passwd = '123zxcASD'
    mail_to_list = ['chenyan@feling.net']
    
    msg = MIMEText(content,_subtype='html',_charset='utf8')
    msg['From'] = '<%s>' % mail_from
    msg['To'] = ';'.join(mail_to_list)
    msg['Subject'] = u'网游三国之野人当道'
    msg['Date'] = formatdate(localtime=True) 
    
    s = smtplib.SMTP(host, port)
    s.login(mail_from, passwd)
    s.sendmail(mail_from, mail_to_list, msg.as_string())
    s.quit()
    
    return 

def check_update():
    url = 'http://tieba.baidu.com/mo/q---EA83DE9C3F70D1FBF60B0522E4C92C5D%3AFG%3D1-sz%40320_240%2C-1-3-0--2--wapp_1424748909784_365/m?kw=%E6%B2%89%E9%BB%98%E7%9A%84%E5%BF%A7%E4%BC%A4&lm=4&lp=5001&pinf=1_2_0'
    html = urllib2.urlopen(url).read()
    soup = BeautifulSoup(html, from_encoding="utf8")
    soup_list = []
    for i in soup.select('div[class~=i]'):
        soup_list.append(str(i))
    url_list = []
    max_chapter = ''
    for i in soup_list:
        if re.findall('第(\d{4})章.+</a>',i,re.I) and not '山寨' in i:
            chapter = re.findall('第(\d{4})章.+</a>',i,re.I)[0]
            if not max_chapter:
                max_chapter = chapter
            if chapter == get_current_chapter():
                break
            else:
                url_list.insert(0,'http://tieba.baidu.com/mo/q---EA83DE9C3F70D1FBF60B0522E4C92C5D%3AFG%3D1-sz%40320_240%2C-1-3-0--2--wapp_1424748909784_365/' + re.findall('<a href="(.+)">',i,re.I)[0].replace('&amp;','&'))
    set_current_chapter(max_chapter)
    return url_list
    

def publish_novel(url_list):
    for url in url_list:
        if url:
            html1 = urllib2.urlopen(url).read()
            soup1 = BeautifulSoup(html1, from_encoding="utf8")
            title = str(soup1.select('title')[0]).replace('<title>','').replace('</title>','') + '<br/>'
            segment1 = str(soup1.select('div[class=i]')[0])
            segment2_url = 'http://tieba.baidu.com/mo/q---EA83DE9C3F70D1FBF60B0522E4C92C5D%3AFG%3D1-sz%40320_240%2C-1-3-0--2--wapp_1424748909784_365/' + re.findall('下一段</a> <a href="(.+)">余下全文</a>', segment1, re.I)[0].replace('&amp;','&')
            
            html2 = urllib2.urlopen(segment2_url).read()
            soup2 = BeautifulSoup(html2, from_encoding="utf8")
            segment2 = str(soup2.select('div[class=i]')[0])
            
            segment1 = re.sub('<br/>\(1/\d\)<a.+','',segment1)
            segment1 = re.sub('.+>http://tieba.baidu.com/p/3151454928</a>','',segment1)
            segment2 = re.sub('<div class="i">1楼. ','',segment2)
            segment2 = re.sub('<table>.+','',segment2)
            print title
            print segment1
            print segment2
            send_email(title + segment1 + segment2)

url_list = check_update()
publish_novel(url_list)
